## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.3
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: viridisLite
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Plotly
data("instacart")
instacart = instacart %>%
janitor::clean_names() %>%
mutate(
department = as.factor(department)
) %>%
select(-user_id, -eval_set ) %>%
ungroup()
figure 2
barfig = instacart %>%
group_by(order_id) %>%
mutate(
num_items_ordered = max(add_to_cart_order)
) %>%
ungroup() %>%
filter(num_items_ordered == 1) %>%
group_by(department_id) %>%
add_count(department_id, sort = TRUE, name = "sum_dept_ordered") %>%
ungroup() %>%
distinct(department, sum_dept_ordered) %>%
mutate(
department = fct_reorder(department, sum_dept_ordered)
)
plot_ly(barfig, x = ~department, y = ~sum_dept_ordered, color = ~department, type = "bar", colors = "Set1")%>%
layout(
title = 'Number of Department orders with 1 item',
xaxis = list(title = 'Department',
zeroline = TRUE),
yaxis = list(title = 'Number of orders from department',
zeroline = TRUE)
)
## how does number of items ordered effect % of produce
perc_produced = instacart %>%
select(-aisle_id, -aisle, -department_id) %>%
group_by(order_id) %>%
mutate(
num_items_ordered = max(add_to_cart_order)
) %>%
add_count(department, name = "dept_count_ordered") %>%
ungroup() %>%
filter(department == "produce") %>%
distinct(order_id, num_items_ordered, dept_count_ordered,order_dow) %>%
rename(num_produce = dept_count_ordered) %>%
mutate(
order_dow = as.factor(order_dow),
percent_produce = 100*num_produce/num_items_ordered,
ratio_produce = num_produce/(num_items_ordered - num_produce),
order_group = case_when(
num_items_ordered == 2 ~ "2 items",
between(num_items_ordered, 2, 5) ~ "2-5",
between(num_items_ordered, 5, 10) ~ "5-10",
between(num_items_ordered, 10, 20) ~ "10-20",
between(num_items_ordered, 20, 50) ~ "20-50",
num_items_ordered > 50 ~ "50+"
),
order_group = fct_relevel(order_group, "2 items", "2-5", "5-10", "10-20", "20-50", "50+")
) %>%
filter(order_group != "NA")
plot_ly(perc_produced, y = ~percent_produce, color = ~order_group, type = "box", colors = "Set2") %>%
layout(
title = "Distribution of percent produce of carts by size or order",
xaxis = list(title = 'Number of items in order',
zeroline = TRUE),
yaxis = list(title = 'Percent of order made up of produce',
zeroline = TRUE)
)